

import pytesseract


def OcrImg(pic_list):

    title_list = ["姓名", "性别", "民族", "出生日期", "地址", "身份证号码"]
    dic = {}

    for i in range(0, len(pic_list)):
        x = pytesseract.image_to_string(pic_list[i], lang="chi_sim")
        dic[title_list[i]] = x.replace('\n', '')
        dic[title_list[i]] = dic[title_list[i]].replace(' ', '')
    # print(dic)

    # 对识别出的文本进行后处理
    dic['性别'] = (dic['性别'].replace(' ', ''))[-1]
    dic['性别'] = dic['性别'].replace('另', '男')
    dic['民族'] = (dic['民族'].replace(' ', ''))[-1]
    dic['民族'] = (dic['民族'].replace('汊', '汉'))
    for key, value in dic.items():
        print(key + '：' + value)